kernel_path =  (File.expand_path(__FILE__).split("/"))[0..-2].join("/") + '/'
require kernel_path +'env'
require kernel_path +'pdb_my_lib.rb'
require kernel_path +'log_lib'
require kernel_path +'db_lib'
require kernel_path +'blast_lib'
require kernel_path +'rmsd_lib'

# 0) rsync with pdb  server to get new files
#   0.1) while( rsync.output != <empty>) do rsync - in case we had IO problems - will fetch pdb files, as long as there is delta.
# 
# 1) get sequences of structures from delta_list
# 2) update blast database file.
# 3) after 2 we may run a fast blast query of all the delta-list structures against all other structures. 
# 4) take hits list from (3), parse and make it a falt db friendly csv file
# 5) get 3d structure for all chain pairs
# 6) do TMscore to find rmsd and gdt for them
# 7) update all those pairs in database
# 8) proabably rerun clustering and recreate leaders_table

#-------------------------------------------------------------------
# 0) rsync with pdb  server to get new/updated files
def rsync_with_web(raw_rsync_cmd)
	lines = %x[#{raw_rsync_cmd} | grep pdb | grep gz]
	list_of_deltas = []
	while !lines.empty?
		list_of_deltas.concat lines
	end
	#build list_of_deltas with full paths TODO
	return list_of_deltas
end
#-------------------------------------------------------------------
def update_seqres_and_xyz_data delta_list, delta_fasta_filename
	log_me "Size of delta_list: #{delta_list.size}"
	f = File.new(delta_fasta_filename,"w")
	counter = 0
	delta_list.each do |gzipped_pdb|
	
		log_me counter.to_s if (counter % 1024 == 0)
		name = gzipped_pdb.split("/pdb")[-1].split(".ent")[0]
		arr_for_precision = []
		structure_ref = []
		begin
			hsh = get_seqres_from_atoms_sections gzipped_pdb, arr_for_precision, structure_ref
			save_CAs_from_structure(get_path('rxyz_files')+ '/' + name,'.rxyz', structure_ref[0])
		rescue => err
			hsh = nil
			if err.to_s.include? '[e]'
				err_log err.to_s 
			elsif err.to_s.include? '[w]'
				#warning_log err.to_s 
				1
			else
				err_log err.to_s 
			end
		end #end rescue
		next if ! hsh
		
		hsh.each_pair do |chain,seqres|
			next if !chain or seqres.size < 2 
			insert_or_update_seqres name,chain,seqres,arr_for_precision[0]
			counter += 1
			f.puts ">#{name}#{chain}\n#{seqres}"
		end
	end
	f.flush
	f.close
	return counter
end
#-------------------------------------------------------------------
def build_single_fasta_file_from_db(filename)
	f = File.new(filename,"w")
	counter = 0
	chains_iterate('') { |name,chain,sequence|
		counter +=1
		f.puts ">#{name}#{chain}\n#{sequence}"
	}
	f.flush
	f.close
	return counter
end
#-------------------------------------------------------------------
def update_blast_db(formatdb_bin, storage_path, fasta_fname)
	%x[#{formatdb_bin}  -t master -i #{fasta_fname} -p T -V -n #{storage_path}/sync_master]
end
#-------------------------------------------------------------------
def run_blast_on_delta(blastall_bin,blast_db,delta_fastas)
	%x[#{blastall_bin} -p blastp -d #{blast_db} -i #{delta_fastas} -e 0.1 -m 0 -o sync_blasto -F F]
end 
#----------------------------------------------
def work_on_new_pairs(blast_output_fname, limit=123456789)
	counter = 0
	f = File.new('../sync_flat_blasto','w')
	iterate_blast_queries(blast_output_fname, -1) { |q| 
		q_name = q.split(" ")[1]
		iterate_hits_of_single_query(q) { |h,s_name|
			counter += 1
#log_me counter.to_s if (counter % 1024 == 0) 
			hsh = parse_hit(s_name,h)[0]	
#tmscore_arr = rmsd_one_blast_hit( q_name,s_name,hsh[:left_q],hsh[:left_s],hsh[:seq_q],hsh[:seq_s]).split
			
	                                       
	                                        
 			hsh[:r_name]=s_name
	                hsh[:l_name]=q_name
	                f.puts "#{hsh[:l_name]},#{hsh[:r_name]},#{hsh[:evalue]},#{hsh[:length]},#{hsh[:ident]},#{hsh[:positives]},#{hsh[:gaps]},#{hsh[:left_q]},#{hsh[:right_q]},#{hsh[:left_s]},#{hsh[:right_s]},#{hsh[:seq_q]},#{hsh[:seq_s]}"
                                       
# 			hsh[:rmsd] =  tmscore_arr[0]
# 			hsh[:gdmt1] = tmscore_arr[1]
# 			hsh[:gdmt2] = tmscore_arr[2]
# 			hsh[:gdmt3] = tmscore_arr[3]
#insert_pair(q_name,[hsh])
	                                
			return counter if counter>limit
			
}
}
	f.flush
	f.close
	return counter
end
#--------------------	
def upload_tmscore_data_from_file(scores_file)
	f = File.new(scores_file)
	while line=f.gets
		arr = line.split(',')
		next if arr.size < 4
		names = [arr[0],arr[1]].sort
# 		RMSD of  the common residues=    2.354
# 		TM-score    = 0.8890  (d0= 5.24, TM10= 0.8890)
# 		MaxSub-score= 0.7835  (d0= 3.50)
# 		GDT-TS-score= 0.8068 %(d<1)=0.5101 %(d<2)=0.7929 %(d<4)=0.9242 %(d<8)=1.0000
# 		GDT-HA-score= 0.5997 %
		#puts "#{names[0]} #{names[1]} #{arr[2]} #{arr[3]} #{arr[4]} #{arr[5]} #{arr[6]}"
		#UPDATE table_name SET column1=value, column2=value2,... WHERE some_column=some_value;

#$connection.query "UPDATE blast_pairs SET rmsd=#{arr[2]}, tmscore=#{arr[3]}, gdmt1=#{arr[4]}, gdmt2=#{arr[5]}, gdmt3=#{arr[6]} WHERE l_name='#{names[0]}' and r_name='#{names[1]}'"
$connection.query "UPDATE blast_pairs SET tmscore=#{arr[3]} WHERE l_name='#{names[0]}' and r_name='#{names[1]}'"

	end
end
#--------------------	
def upload_flat_blast_pairs(blast_flat_pairs)
	#flat is:
	#l_name r_name eval length ident pos gaps left_s right_s left_q right_q seq_s seq_q 
	f = File.new(blast_flat_pairs)
	while line=f.gets
			
		#puts "#{hsh[:l_name]},#{hsh[:r_name]},#{hsh[:evalue]},#{hsh[:length]},#{hsh[:ident]},#{hsh[:positives]},#{hsh[:gaps]},#{hsh[:left_q]},#{hsh[:right_q]},#{hsh[:left_s]},#{hsh[:right_s]},#{hsh[:seq_q]},#{hsh[:seq_s]}"
		arr = line.split(',')
		hsh = {}
		hsh[:l_name]=arr[0]
		hsh[:r_name]=arr[1]
		hsh[:evalue]=arr[2]
		hsh[:length]=arr[3]
		hsh[:ident]=arr[4]
		hsh[:positives]=arr[5]
		hsh[:gaps]=arr[6]
		hsh[:left_q]=arr[7]
		hsh[:right_q]=arr[8]
		hsh[:left_s]=arr[9]
		hsh[:right_s]=arr[10]
		hsh[:seq_q]=arr[11]
		hsh[:seq_s]=arr[12]
	
		hsh[:rmsd] = '0'
		hsh[:gdmt1] = '0'
		hsh[:gdmt2] = '0'
		hsh[:gdmt3] = '0'
		hsh[:tmscore] = '0'
	#	puts hsh.inspect
		insert_pair(hsh[:l_name],[hsh])
	end
end
#----------------------------------------------
def create_tables
begin
$connection.query  "CREATE TABLE `seqres` (
  `pdbname` char(6) NOT NULL,
  `chain` char(1) NOT NULL,
  `seqres` text,
  `prec` float default NULL,
  PRIMARY KEY  (`pdbname`,`chain`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1"
rescue => err
	log_me err
end
begin
$connection.query  "CREATE TABLE `clusters` (
  `chain` char(6) NOT NULL,
  `leader` char(6) NOT NULL,
  PRIMARY KEY  (`chain`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1"
rescue => err
	log_me err
end
begin
$connection.query " CREATE TABLE `blast_pairs` (
  `l_name` char(6) NOT NULL,
  `r_name` char(6) NOT NULL,
  `part` int(11) NOT NULL,
  `evalue` char(10) default NULL,
  `length` int(11) NOT NULL,
  `ident` int(10) unsigned NOT NULL,
  `positives` int(10) unsigned NOT NULL,
  `gaps` int(10) unsigned NOT NULL,
  `left_q` int(3) unsigned NOT NULL,
  `right_q` int(3) unsigned NOT NULL,
  `left_s` int(3) unsigned NOT NULL,
  `right_s` int(3) unsigned NOT NULL,
  `seq_q` text NOT NULL,
  `seq_s` text NOT NULL,
  `rmsd` float NOT NULL,
  `gdmt1` float NOT NULL,
  `gdmt2` float NOT NULL,
  `gdmt3` float NOT NULL,
  `tmscore` float NOT NULL,
  PRIMARY KEY  USING BTREE (`l_name`,`r_name`,`part`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
"
rescue => err
	log_me err
end
create_table_leaders_pairs
end
#-------------
def do_full_sync_cycle
# 	%x[rm -rf /tmp/sync_*]
 	log_me "Create tables"
 	create_tables
 	log_me "update_seqres_and_xyz_data".gsub("_"," ")
 	delta_list = Dir[get_path('seq_files')+'*/*']
# 	counter_chains = update_seqres_and_xyz_data delta_list,'/tmp/sync_deltafasta'
# 	log_me "Chains sent to db: #{counter_chains}"
# 	log_me "build_single_fasta_file_from_db".gsub("_"," ")
# 	counter_chains = build_single_fasta_file_from_db('/tmp/sync_bigfatfasta')
# 	log_me "Chains fed to blast: #{counter_chains}"
# 	log_me "update_blast_db".gsub("_"," ")
# 	update_blast_db(get_path('formatdb'), '/tmp', '/tmp/sync_bigfatfasta')
#	log_me "run_blast_on_delta".gsub("_"," ")
#	run_blast_on_delta(get_path('blastall'),'/tmp/sync_master', '/tmp/sync_deltafasta') 
	log_me "work_on_new_pairs".gsub("_"," ")
#	pairs_counter = work_on_new_pairs '/home/snepomny/rsync/scripts_rb/sync_blasto'
	work_on_flat_pairs('/home/snepomny/rsync/sync_flat_blasto')
#	upload_flat_blast_pairs('/home/snepomny/rsync/sync_flat_blasto')
#	log_me "Finished processing #{pairs_counter} pairs"
end

#----------------------------------------------
def work_on_flat_pairs(blast_flat_fname, limit=123456789)
	counter = 0
	f = File.new(blast_flat_fname)
	out = File.new(blast_flat_fname+'.res','w')
	while (line=f.gets)

			counter += 1
#log_me counter.to_s if (counter % 1024 == 0) 
#hsh = parse_hit(s_name,h)[0]	
			arr = line.split(',')
			next if arr.size != 13
			tmscore_arr = rmsd_one_blast_hit( arr[0], arr[1],arr[7],arr[9],arr[11].strip,arr[12].strip)
# 			hsh[:rmsd] =  tmscore_arr[0]
# 			hsh[:gdmt1] = tmscore_arr[1]
# 			hsh[:gdmt2] = tmscore_arr[2]
# 			hsh[:gdmt3] = tmscore_arr[3]
			out.puts "#{arr[0]},#{arr[1]},#{tmscore_arr.split().join(',')}"
			
			return counter if counter>limit
	end
	out.flush
	out.close
	return counter
end
